import time
import random
import requests
from lxml import etree

page = 1

for page in range(424,0,-1):

    if page == 424:
        url = 'https://www.mps.gov.cn/n7598382/index.html'
    else:
        url = f'https://www.mps.gov.cn/n7598382/index_7603065_{page}.html'
    headers = {
        'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36',
        'cookie':'__jsluid_s=45506c2debfb196ed8d1ccfbcc575c57; __jsl_clearance_s=1763033057.635|0|%2FLyPAMgxm2%2F%2B1JY0dIJ6Y0FwWCw%3D',
    }
    response = requests.get(url,headers=headers)
    print(response.status_code)
    print(f'-----------开始爬取{page}--------------')
    tree = etree.HTML(response.content.decode('utf-8'))
    items = tree.xpath('//ul[@class="list"]/li')
    for item in items:

        detail_url = item.xpath('./a/@href')[0]
        res = requests.get(detail_url,headers=headers)


        for temp in tree.xpath('//div[@class="trs_editor_view TRS_UEDITOR trs_paper_default"]'):
            paragraphs = temp.xpath('./p/text()')
            content = ''.join(paragraphs).strip()
            dit = {
                '时间': item.xpath('./span/text()')[0],
                '标题': item.xpath('./a/text()')[0],
                '详情URL': item.xpath('./a/@href')[0],
                '文本':content,
            }
            print(dit)
    time.sleep(random.randint(1,3))
